package com.jiaz.scripts;

import com.csvreader.CsvWriter;
import org.springframework.stereotype.Repository;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;


/**
 * 数据源清理
 *
 * @author jiajiazi
 * @version 1.0
 * @date 2022/4/23 16:04
 */

@Repository
public class OriginFile {

    /**
     * 源txt文件转化为csv文件便于管理
     * 注意先转为utf_8文件
     *
     * @param args
     */
    public static void main(String[] args) {
        String path = "D:\\Miwork\\jiazi\\POI_SOURCE\\dataset_tsmc2014\\";
        String readFile = path + "dataset_TSMC2014_TKY.txt";
        String writeFile = path + "checkin_tky_2014.csv";
        File file = new File(readFile);
        String[] head = {"user_id", "venue_id1", "venue_category_id", "venue_category_name", "lat", "lon", "utc_offset", "create_time"};
        CsvWriter csvWriter = new CsvWriter(writeFile, ',', StandardCharsets.UTF_8);

        try {
            FileReader fileReader = new FileReader(file);
            BufferedReader bufferedReader = new BufferedReader(fileReader);
            String string;
            csvWriter.writeRecord(head);
            while ((string = bufferedReader.readLine()) != null) {
                // \\s+ 多个空格
                String[] s = string.split("\t|\\?");
                csvWriter.writeRecord(s);
            }
            csvWriter.flush();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            csvWriter.close();
        }

    }

}

















